# Feature Extraction with Univariate Statistical Tests (Chi-squared for classification)

import pandas
import numpy
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

# 加载数据
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv('data.csv', names=names)
array = dataframe.values
X = array[:, 0:8]
Y = array[:, 8]

# chi2校验
test = SelectKBest(score_func=chi2, k=4)
fit = test.fit(X, Y)

# 分数
numpy.set_printoptions(precision=3)
print(fit.scores_)
features = fit.transform(X)

# 特征中排名前5的值
print(features[0:5, :])